Importing library¶

In [15]:
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet_preprocess_input
from tensorflow.keras.applications import ResNet50

from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input as vgg_preprocess_input

from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input as efficientnet_preprocess_input

from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model 

import matplotlib.pyplot as plt
import pathlib
import cv2
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import tensorflow_addons as tfa 
import os, sys
import matplotlib.pyplot as plt
import numpy as np
from bokeh.plotting import figure, show, output_notebook, output_file
from bokeh.models import HoverTool, ColumnDataSource
from bokeh.models import HoverTool, ColumnDataSource, ImageURL
from bokeh.transform import linear_cmap
from bokeh.palettes import Category10
from bokeh.models import  LinearColorMapper
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource, LegendItem, Legend
from bokeh.palettes import Category10

class DataLoader:

    def __init__(self, dataset_path, image_size = (224,224), batch_size=32):
        
        # Path to the dataset
        self.dataset_path = pathlib.Path(dataset_path)
        self.batch_size = batch_size
        self.image_size = image_size
        
        self.datagen = ImageDataGenerator(rescale = 1./255,)

        #self.data = self.datagen.flow_from_directory(self.dataset_path, target_size = self.image_size, batch_size = self.batch_size, class_mode = 'categorical')

        self.data = image_dataset_from_directory(self.dataset_path,
                                                seed = 42,
                                                image_size = self.image_size,
                                                labels='inferred',
                                                shuffle=False,
                                                batch_size=self.batch_size,)
        with HiddenPrints():
            self.shuffled_data = image_dataset_from_directory(self.dataset_path,
                                                seed = 42,
                                                image_size = self.image_size,
                                                labels='inferred',
                                                shuffle=True,
                                                batch_size=self.batch_size,)
        self.labels = self.labels()
        self.class_names = self.data.class_names

    def class_labels(self):
        return list(self.data.class_names)

    def labels(self):
        labels = []
        for images, label_batch in self.data:
            labels.extend(label_batch.numpy())
        labels = np.array(labels)
        return labels
    

    # Function to visualize some images
    def visualize(self, num_samples = 5):
        plt.figure(figsize = (10,8))
        for images,labels in self.shuffled_data.take(1):
            for i in range(num_samples):
                plt.subplot(1, num_samples, i + 1)
                plt.imshow(images[i]/255.0)
                plt.title(f'{self.class_labels()[labels[i]]}')

        plt.tight_layout()
        plt.show()
        
    def visualize_grid(self, num_samples=5):
        num_rows = (num_samples + 4) // 5  # Calculate the number of rows needed
    
        plt.figure(figsize=(10, 8))
    
        for images, labels in self.shuffled_data.take(1):
            for row in range(num_rows):
                for i in range(5):
                    index = row * 5 + i
                    if index >= len(images):
                        break  # No more images to display in this row
                    plt.subplot(num_rows, 5, row * 5 + i + 1)
                    plt.imshow(images[index] / 255.0)
                    plt.title(f'{self.class_labels()[labels[index]]}')
    
        plt.tight_layout()
        plt.show()

    def visualize_class_distribution(self):
            plt.figure(figsize=(10, 6))
    
            # Extract labels and class names
            labels = self.labels
            class_names = self.class_names
    
            # Count occurrences of each class label
            class_counts = {class_name: np.sum(labels == idx) for idx, class_name in enumerate(class_names)}
    
            # Generate a unique color for each class
            colors = plt.cm.get_cmap('tab20', len(class_names))
    
            # Create a bar graph with different colors for each class
            plt.bar(class_counts.keys(), class_counts.values(), color=[colors(i) for i in range(len(class_names))])
            plt.xlabel('Class Label')
            plt.ylabel('Count')
            plt.title('Class Distribution')
            
            # Add a legend for class names
            handles = [plt.Rectangle((0,0),1,1, color=colors(i), ec="k", label=class_name) for i, class_name in enumerate(class_names)]
            plt.legend(handles=handles, title='Class Names', loc='upper right')
    
            plt.show()


class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout


class CustomModel:

    # Constructor for CustomModel, initializes model according to parameters passed
    def __init__(self, model_name, data_loader, weights ="imagenet", include_top = False):
        self.model_name = model_name
        self.weights = weights
        self.include_top = include_top
        self.data = data_loader.data
        self.class_names = data_loader.class_names
        self.model = self.initialize_model()
        self.features = None
        self.data_loader = data_loader
    
        
    
    # Initialize a model with custom parameters like model type, weights etc.
    def initialize_model(self):

        # ResNet50 model
        if self.model_name == 'resnet50':
            base_model = ResNet50(weights = self.weights, include_top = self.include_top)
            output = GlobalAveragePooling2D()(base_model.output)
            model = Model(inputs= base_model.input, outputs=output)
            self.data = self.data.map(lambda x,y : (resnet_preprocess_input(x),y))
            return model
        
        # VGG 16 model
        if self.model_name == 'vgg19':
            base_model = VGG19(weights=self.weights, include_top=self.include_top)
            intermediate_layer_name = 'block5_pool'
            output = GlobalAveragePooling2D()(base_model.output)
            # model = Model(inputs=base_model.input, outputs=base_model.get_layer(intermediate_layer_name).output)
            model = Model(inputs=base_model.input, outputs=output)
            self.data = self.data.map(lambda x,y : (vgg_preprocess_input(x),y))
            return model

        if self.model_name == 'efficientnet':
            base_model = EfficientNetB0(weights= self.weights, include_top=self.include_top)
            output = GlobalAveragePooling2D()(base_model.output)
            model = Model(inputs = base_model.input, outputs = output)
            self.data = self.data.map(lambda x,y : (efficientnet_preprocess_input(x),y))
            return model
        
    # For extracting features from custom model, common for all models    
    def extract_features(self):
        features = self.model.predict(self.data)
        self.features = features
        print(f"{self.model_name} extracted feature space size :", self.features.shape) 
         

from sklearn.manifold import TSNE
from sklearn.manifold import MDS
from bokeh.models import ColumnDataSource, CustomJSHover, HoverTool
from bokeh.plotting import output_file, save
import umap
from sklearn.cluster import SpectralClustering, KMeans
import hdbscan
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, fowlkes_mallows_score,homogeneity_completeness_v_measure
class FeatureSpaceVisualizer:

    def __init__(self, model, reduction_method = 'tsne'):

        self.original_feature_space = model.features
        self.data = model.data
        self.model_name = model.model_name
        self.class_names = model.class_names
        self.reduction_method = reduction_method
        self.low_dim_feature_space = None
        self.extrinsic_metrics = {}
        
        self.initialize_visualizer()
        self.data_loader = model.data_loader
        

    def initialize_visualizer(self):
        if self.reduction_method == 'tsne':
            tsne = TSNE(n_components = 2, random_state = 42)
            self.low_dim_feature_space = tsne.fit_transform(self.original_feature_space)
            print(f'TSNE reduced {self.model_name} feature space shape :', self.low_dim_feature_space.shape)
            
        if self.reduction_method == 'mds':
            mds = MDS(n_components=2)
            self.low_dim_feature_space = mds.fit_transform(self.original_feature_space)

        if self.reduction_method == 'umap':
            self.low_dim_feature_space = umap.UMAP().fit_transform(self.original_feature_space)
            print(f'UMAP reduced {self.model_name} feature space shape :', self.low_dim_feature_space.shape)

    
    def visualize(self):
        labels = []
        for images, label_batch in self.data:
            labels.extend(label_batch.numpy())

        labels = np.array(labels)
        # Create a scatter plot
        plt.figure(figsize=(8, 6))
        scatter = plt.scatter(self.low_dim_feature_space[:, 0], self.low_dim_feature_space[:, 1], c=labels, cmap='viridis')       
        class_names = self.class_names
        # Create a dictionary to map class labels to unique colors
        unique_labels = np.unique(labels)
        colors = plt.cm.viridis(np.linspace(0, 1, len(unique_labels)))
        class_color_mapping = {label: color for label, color in zip(unique_labels, colors)}       
        # Add legend with custom class names and colors
        legend_elements = [plt.Line2D([0], [0], marker='o', color='w', label=class_names[label], markerfacecolor=color, markersize=10) for label, color in class_color_mapping.items()]
        plt.legend(handles=legend_elements, loc='upper right')   
        plt.title(f't-SNE Visualization of {self.model_name} features')
        plt.xlabel(f'{self.model_name} t-SNE Dimension 1')
        plt.ylabel(f'{self.model_name} t-SNE Dimension 2')
        plt.savefig(f'{self.model_name}.png', dpi=300)
        plt.show()
    
    def spectral_clustering(self):
        n_clusters = len(self.class_names)
        spectral_clustering = SpectralClustering(n_clusters=n_clusters, random_state=42)
        spectral_cluster_labels = spectral_clustering.fit_predict(self.low_dim_feature_space)
        plt.figure(figsize=(8, 6))
        plt.scatter(self.low_dim_feature_space[:, 0], self.low_dim_feature_space[:, 1], c=spectral_cluster_labels, cmap='viridis', s=50)
        plt.title('Spectral Clustering')
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.show()

    
    def supervised_metrics(self,true_labels, cluster_labels, title):
        ari = adjusted_rand_score(true_labels, cluster_labels)
        nmi = normalized_mutual_info_score(true_labels, cluster_labels)
        fm = fowlkes_mallows_score(true_labels, cluster_labels)
        vm = homogeneity_completeness_v_measure(true_labels, cluster_labels)
        
        print(f"{title} Adjusted Rand Index (ARI -1-1): {ari:.2f}")
        print(f"{title} Normalized Mutual Information (NMI 0-1): {nmi:.2f}")
        print(f"{title} Fowlkes-Mallows Score (0-1): {fm:.2f}")
        print(f"{title} Vmeasure (0-1): {vm[2]:.2f}")

        extrinsic_metrics = {}
        extrinsic_metrics[f'{title}ari'] = ari
        extrinsic_metrics[f'{title}nmi'] = nmi
        extrinsic_metrics[f'{title}fm'] = fm
        extrinsic_metrics[f'{title}vm'] = vm[2]

        return extrinsic_metrics
        

    def kmeans_clustering(self):
        n_clusters = len(self.class_names)
        kmeans_clustering = KMeans(n_clusters=n_clusters, random_state=42)
        kmeans_clustering.fit_predict(self.original_feature_space)
        kmeans_metrics = self.supervised_metrics(self.data_loader.labels, kmeans_clustering.labels_, 'KMeans')
        plt.figure(figsize=(8, 6))
        plt.scatter(self.low_dim_feature_space[:, 0], self.low_dim_feature_space[:, 1], c=kmeans_clustering.labels_, cmap='viridis', s=50)
        plt.title('KMeans Clustering')
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.show()
        return kmeans_metrics



    
    def hdbscan_clustering(self):

        HDB =  hdbscan.HDBSCAN(min_cluster_size=5)
        HDB.fit(self.original_feature_space)
        extrinsic_metrics = self.supervised_metrics(self.data_loader.labels, HDB.labels_, 'HDBSCAN')
        
        
        plt.figure(figsize=(8, 6))

        # Scatter plot each data point with a color corresponding to its cluster
        plt.scatter(self.low_dim_feature_space[:, 0], self.low_dim_feature_space[:, 1], c=HDB.labels_, cmap='viridis', s=50)
        
        plt.title('HDBSCAN Clustering')
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.show()

        return extrinsic_metrics
         
    def visualize_bokeh(self, save=False):
    
        title = f"TSNE visualization for {self.model_name}" 
        data = self.low_dim_feature_space
        data_loader = self.data_loader
        
        #Extract class labels:
        class_labels = data_loader.labels
        class_names = data_loader.class_labels()
    
        # Create image paths for bokeh
        image_paths = [str(pathlib.Path.cwd() / image_path) for image_path in data_loader.data.file_paths]
        image_paths_with_file_scheme = ['file:///' + path for path in image_paths]

        #Store file names
        file_names = [str(os.path.split(path)[1]) for path in image_paths]

    
        # Define a color palette based on the number of unique class labels
        unique_labels = list(set(class_labels))
        num_classes = len(unique_labels)
        if num_classes<3:
            colors = ['#1f77b4', '#ff7f0e']
        else:
            colors = Category10[num_classes]  # You can choose a different palette if needed
    
        # Map class labels to colors
        color_mapping = {label: colors[i] for i, label in enumerate(unique_labels)}
        point_colors = [color_mapping[label] for label in class_labels]
    
        # Map numerical labels to class names
        class_names_mapping = {i: class_name for i, class_name in enumerate(class_names)}
        label_names = [class_names_mapping[label] for label in class_labels]
    
        # Create a Bokeh ColumnDataSource with image data
        source = ColumnDataSource(data=dict(
            x=data[:, 0],
            y=data[:, 1],
            imgs=image_paths_with_file_scheme,  # Store image filenames for tooltips
            labels=label_names,
            fnames = file_names,
            colors=point_colors,  # Store point colors
        ))
    
        # Create a new Bokeh figure for the scatter plot
        p = figure(title=title, toolbar_location='right', tools="pan,box_zoom,reset,wheel_zoom")

         # <div>
         #    <span style="font-size: 10px; font-weight: bold;">File:@imgs</span>
         # </div>
        
        # Define the tooltip template
        tooltip_template = """
            <div>
                <div>
                    <span style="font-size: 14px; font-weight: bold;">Label: </span>
                    <span style="font-size: 14px;">@labels</span>
                </div>
               
                 <div>
            <span style="font-size: 10px; font-weight: bold;">File:@imgs</span>
         </div>
                <div>
                    <img src="@imgs" alt="" width="200" height="200">
                </div>
            </div>
            
        """

    
        # Add tooltips using the template
        hover = HoverTool(tooltips=tooltip_template)
    
        # Add the hover tool to the plot
        p.add_tools(hover)
    
        # Create a legend and legend items
        legend_items = []
        for class_label, class_color in color_mapping.items():
            class_indices = [i for i, label in enumerate(class_labels) if label == class_label]
            class_source = ColumnDataSource(data=dict(
                x=[data[i, 0] for i in class_indices],
                y=[data[i, 1] for i in class_indices],
                imgs=[image_paths_with_file_scheme[i] for i in class_indices],
                labels=[label_names[i] for i in class_indices],
                colors=[class_color] * len(class_indices)
            ))
            scatter = p.scatter('x', 'y', source=class_source, size=8, color='colors', alpha=0.5, legend_label=class_names_mapping[class_label])
            legend_items.append(LegendItem(label=class_names_mapping[class_label], renderers=[scatter]))
    
        legend = Legend(items=legend_items)
        p.add_layout(legend)

      
        return p


from sklearn.metrics import silhouette_score, davies_bouldin_score, calinski_harabasz_score
from s_dbw import S_Dbw
from sklearn.neighbors import LocalOutlierFactor
import hdbscan

class Metrics:
    def __init__(self, feature_space, data_loader, decimal_places = 3):
        self.feature_space = feature_space
        self.labels = data_loader.labels
        self.decimal_places = decimal_places
        self.metrics = None
        self.calculate()
        
    

    def calculate(self):

        features = self.feature_space
        labels = self.labels
    
        # # of outliers
        LOF = LocalOutlierFactor(n_neighbors = int(np.sqrt(len(labels))))
        
        outliers = LOF.fit_predict(features)
        num_outliers = len(np.where(outliers==-1)[0])
        
        
        # Intrinsic metrics
        silhouette = silhouette_score(features, labels)
        davies_bouldin_index = davies_bouldin_score(features, labels)
        calinski_harabasz_index = calinski_harabasz_score(features, labels)
        s_dbw = S_Dbw(features, labels, centers_id=None, method='Tong', alg_noise='bind', centr='mean', nearest_centr=True, metric='euclidean')

        metrics = {'silhouette' : silhouette, 'DBI':davies_bouldin_index, 'CH':calinski_harabasz_index, 'sdbw':s_dbw, 'outliers':num_outliers}
        
        print('# of outliers : {:.{dp}f}'.format(metrics['outliers'], dp=self.decimal_places))
        print('Silhouette score : {:.{dp}f}'.format(metrics['silhouette'], dp=self.decimal_places))
        print('Davies Bouldin Index *: {:.{dp}f}'.format(metrics['DBI'], dp=self.decimal_places))
        print('Calinski Harabasz Index: {:.{dp}f}'.format(metrics['CH'], dp=self.decimal_places))
        print('S_Dbw *: {:.{dp}f}'.format(metrics['sdbw'], dp=self.decimal_places))

        self.metrics = metrics
        return metrics

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
)
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
from keras.layers import Dropout
from keras.optimizers import Adam


class ModelEvaluator:
    
    def __init__(self, model, data_loader, feature_visualizer,  test_size=0.30, verbose=False):

        self.verbose = verbose
        self.model_name = model.model_name
        self.features = model.features
        self.labels= data_loader.labels
        self.low_dim_features = feature_visualizer.low_dim_feature_space 
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.features, self.labels, test_size=test_size, random_state=42
        )

    def evaluate_svm(self):
        svm_classifier = SVC(kernel='linear', C=1.0)
        svm_classifier.fit(self.X_train, self.y_train)
        svm_predictions = svm_classifier.predict(self.X_test)
        if self.verbose:
            self.print_metrics("SVM Classifier Metrics:", svm_predictions)
        else:
            self.print_short_metrics("SVM Classifier", svm_predictions)
        


    def evaluate_knn(self):
        n_neighbors = int(np.sqrt(len(self.X_train)))
        knn_classifier = KNeighborsClassifier(n_neighbors=n_neighbors)
        knn_classifier.fit(self.X_train, self.y_train)
        knn_predictions = knn_classifier.predict(self.X_test)
        if self.verbose:
            self.print_metrics("k-NN Classifier Metrics:", knn_predictions)
        else: 
            self.print_short_metrics("k-NN Classifier", knn_predictions)

    def build_nn(self):
        optimizer = Adam(learning_rate=0.001)
        
        
        num_classes = len(np.unique(self.labels))
        print('num claasees :',num_classes)
        
        # Define a neural network model for multi-class classification
        model = Sequential()
        model.add(Dense(512, input_dim=self.X_train.shape[1], activation='relu'))
        # model.add(Dense(512, activation='relu'))
        model.add(Dense(256, activation='relu'))
        model.add(Dense(128, activation='relu'))
        model.add(Dense(64, activation='relu'))
        

        model.add(Dense(32, activation='relu'))
        
        model.add(Dense(num_classes, activation='softmax'))  # Use 'softmax' for multi-class
        
        # Compile the model
        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        
        return model
        
    def evaluate_nn(self):
        y_train_categorical = to_categorical(self.y_train)
        y_test_categorical = to_categorical(self.y_test)
        model = self.build_nn()
    
        # Train the model
        model.fit(self.X_train, y_train_categorical, epochs=30, batch_size=32, verbose=1)
        
        # Make predictions on the test data
        nn_pred_onehot = model.predict(self.X_test)
        nn_pred = np.argmax(nn_pred_onehot, axis=1)  # Convert one-hot to class labels

        if self.verbose:
            self.print_metrics("Neural Network Classifier Metrics:", nn_pred)
        else: 
            self.print_short_metrics("Neural Network Classifier", nn_pred)

        return model

         
    
    def evaluate_random_forest(self, n_estimators=100):
        rf_classifier = RandomForestClassifier(n_estimators=n_estimators, random_state=42)
        rf_classifier.fit(self.X_train, self.y_train)
        rf_predictions = rf_classifier.predict(self.X_test)
        if self.verbose:
            self.print_metrics("Random Forest Classifier Metrics:", rf_predictions)
        else:
            self.print_short_metrics("Random Forest Classifier", rf_predictions)
        

    def print_metrics(self, title, predictions):
        accuracy = accuracy_score(self.y_test, predictions)
        precision = precision_score(self.y_test, predictions, average='weighted')
        recall = recall_score(self.y_test, predictions, average='weighted')
        f1 = f1_score(self.y_test, predictions, average='weighted')
        confusion = confusion_matrix(self.y_test, predictions)

        print(title)
        print(f"Accuracy: {accuracy*100:.2f}%")
        print(f"Precision: {precision*100:.2f}%")
        print(f"Recall: {recall*100:.2f}%")
        print(f"F1 Score: {f1*100:.2f}%")
        print("Confusion Matrix:\n", confusion,'\n')

    def print_short_metrics(self, title, predictions):
        f1 = f1_score(self.y_test, predictions, average='weighted')
        
        
        print(f"{title} F1 Score: {f1*100:.2f}%")
        

    def evaluate(self):
        print(f'-----------------------{self.model_name}----------------------------')

        self.evaluate_nn()
        # Evaluate SVM Classifier
        self.evaluate_svm()
        
        # Evaluate k-NN Classifier
        self.evaluate_knn()
        
        # Evaluate Random Forest Classifier
        self.evaluate_random_forest()


    def evaluate_with_cross_validation(self, n_splits=10):
        ml_performances ={}
        # Initialize cross-validation
        cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

        # Evaluate SVM Classifier with cross-validation
        svm_classifier = SVC(kernel='linear', C=1.0)
        svm_scores = cross_val_score(svm_classifier, self.features, self.labels, cv=cv, scoring='accuracy')
        print("SVM Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(svm_scores) * 100))
        
        # Evaluate k-NN Classifier with cross-validation
        knn_classifier = KNeighborsClassifier(n_neighbors=int(np.sqrt(len(self.X_train))))
        knn_scores = cross_val_score(knn_classifier, self.features, self.labels, cv=cv, scoring='accuracy')
        print("k-NN Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(knn_scores) * 100))

        # Evaluate Random Forest Classifier with cross-validation
        rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_scores = cross_val_score(rf_classifier, self.features, self.labels, cv=cv, scoring='accuracy')
        print("Random Forest Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(rf_scores) * 100))

        # # Evaluate NN Classifier with cross-validation
        nn_scores = []
        
        for train_index, val_index in cv.split(self.features, self.labels):
            X = self.features
            y = self.labels
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]
        
            # Define and compile your neural network model
            model = self.build_nn()
        
            # Convert labels to one-hot encoding
            y_train_categorical = to_categorical(y_train)
            y_val_categorical = to_categorical(y_val)
        
            # Train the model
            model.fit(X_train, y_train_categorical, epochs=10, batch_size=32, verbose=0)
        
            # Make predictions on the validation set
            y_val_pred = model.predict(X_val)
            y_val_pred_classes = np.argmax(y_val_pred, axis=1)
        
            # Calculate accuracy and store it in the list
            accuracy = accuracy_score(y_val, y_val_pred_classes)
            nn_scores.append(accuracy)

        
        
        print("Neural network Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(nn_scores) * 100))

        ml_performances['rf'] = np.mean(rf_scores) * 100
        ml_performances['knn'] = np.mean(knn_scores) * 100
        ml_performances['svm'] = np.mean(svm_scores) * 100
        ml_performances['nn'] = np.mean(nn_scores) * 100
        return ml_performances
        
    def evaluate_low_dim_with_cross_validation(self, n_splits=10):
        # Initialize cross-validation
        cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

        # Evaluate SVM Classifier with cross-validation
        svm_classifier = SVC(kernel='linear', C=1.0)
        svm_scores = cross_val_score(svm_classifier, self.low_dim_features, self.labels, cv=cv, scoring='accuracy')
        print("SVM Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(svm_scores) * 100))

        # Evaluate k-NN Classifier with cross-validation
        knn_classifier = KNeighborsClassifier(n_neighbors=int(np.sqrt(len(self.X_train))))
        knn_scores = cross_val_score(knn_classifier, self.low_dim_features, self.labels, cv=cv, scoring='accuracy')
        print("k-NN Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(knn_scores) * 100))

        # Evaluate Random Forest Classifier with cross-validation
        rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
        rf_scores = cross_val_score(rf_classifier, self.low_dim_features, self.labels, cv=cv, scoring='accuracy')
        print("Random Forest Classifier Cross-Validation Accuracy: {:.2f}%".format(np.mean(rf_scores) * 100))
In [3]:
# print(f'\n\n -------------------For dataset{dataset.dataset_path}-------------------- \n\n\n\n') 
# resnet = CustomModel(model_name='resnet50', data_loader = dataset)
# resnet.extract_features()
# resnet_tsne = FeatureSpaceVisualizer(resnet,reduction_method='tsne' )
# resnet_tsne.visualize()
# extrinsic_metrics = resnet_tsne.kmeans_clustering()
# print(extrinsic_metrics)
# resnet_metrics = Metrics(feature_space = resnet.features, data_loader = dataset)

# # #Evaluate the dataset
# # resnet_evaluator = ModelEvaluator(resnet, dataset, resnet_tsne, test_size = 0.3)
# # ml_scores = resnet_evaluator.evaluate_with_cross_validation()

# # # Populating results fro this dataset
# # datasets[dataset].append(resnet_metrics.metrics)
# # datasets[dataset]. append(resnet_tsne.extrinsic_metrics)
# # datasets[dataset]. append(ml_scores)
# # datasets[dataset]. append(extrinsic_metrics)

Setting up pipeline¶

Importing datasets¶

In [157]:
# Loading the datasets
mattendichtung_loader = DataLoader(f"C:/Users/hdn7rng\Desktop\Experiments\datasets\Mattendichtung")
asqmm_loader = DataLoader(f"C:/Users/hdn7rng/Desktop/Experiments/datasets/ASQMM_CUSTOM")
peg_loader = DataLoader(f"C:/Users/hdn7rng/Desktop/Experiments/datasets/PEG")
nut_loader = DataLoader(f"C:/Users/hdn7rng/Desktop/Experiments/datasets/metal_nutold")
screw_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/screw")
bottle_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/bottle")
cable_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/cable")
capsule_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/capsule")

# flowers_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/organic/flowers")
catdog_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/organic/catdog")
mnist_loader = DataLoader( f"C:/Users/hdn7rng/Desktop/Experiments/datasets/organic/mnist")
Found 5600 files belonging to 4 classes.
Found 2916 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Found 358 files belonging to 2 classes.
Found 480 files belonging to 6 classes.
Found 292 files belonging to 4 classes.
Found 374 files belonging to 9 classes.
Found 351 files belonging to 6 classes.
Found 2000 files belonging to 2 classes.
Found 600 files belonging to 10 classes.
In [158]:
# Add datasets to the pipeline
datasets = {}
datasets.update({ catdog_loader:[], asqmm_loader : [],  peg_loader : [], mattendichtung_loader : [], nut_loader : [], screw_loader:[], bottle_loader : [], capsule_loader:[], cable_loader:[], mnist_loader:[]})
efficientnet_datasets = {}
efficientnet_datasets.update({ catdog_loader:[], asqmm_loader : [],  peg_loader : [], mattendichtung_loader : [], nut_loader : [], screw_loader:[], bottle_loader : [], capsule_loader:[], cable_loader:[], mnist_loader:[]})

# datasets.update({  mnist_loader : []})
# 
In [154]:
len(datasets)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[154], line 1
----> 1 datasets[0]

KeyError: 0
In [73]:
%%time
run = 1
# For each dataset
for dataset in datasets:
    print(f'\n\n -------------------For dataset{dataset.dataset_path}-------------------- \n\n\n\n') 
    resnet = CustomModel(model_name='resnet50', data_loader = dataset)
    resnet.extract_features()
    resnet_tsne = FeatureSpaceVisualizer(resnet,reduction_method='tsne' )
    resnet_tsne.visualize()
    kmeans_metrics = resnet_tsne.kmeans_clustering()
    hdb_metrics = resnet_tsne.hdbscan_clustering()
    resnet_metrics = Metrics(feature_space = resnet.features, data_loader = dataset)

    #Evaluate the dataset
    resnet_evaluator = ModelEvaluator(resnet, dataset, resnet_tsne, test_size = 0.3)
    ml_scores = resnet_evaluator.evaluate_with_cross_validation()
    
    # Populating results fro this dataset
    datasets[dataset].append(resnet_metrics.metrics)
    datasets[dataset]. append(kmeans_metrics)
    datasets[dataset]. append(ml_scores)
    datasets[dataset]. append(hdb_metrics)
    
    

 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\organic\catdog-------------------- 




63/63 [==============================] - 5s 73ms/step
resnet50 extracted feature space size : (2000, 2048)
TSNE reduced resnet50 feature space shape : (2000, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
KMeans Adjusted Rand Index (ARI -1-1): 0.89
KMeans Normalized Mutual Information (NMI 0-1): 0.83
KMeans Fowlkes-Mallows Score (0-1): 0.94
KMeans Vmeasure (0-1): 0.83
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.10
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.24
HDBSCAN Fowlkes-Mallows Score (0-1): 0.60
HDBSCAN Vmeasure (0-1): 0.24
No description has been provided for this image
# of outliers : 21.000
Silhouette score : 0.090
Davies Bouldin Index *: 3.154
Calinski Harabasz Index: 192.674
S_Dbw *: 0.954
SVM Classifier Cross-Validation Accuracy: 98.85%
k-NN Classifier Cross-Validation Accuracy: 98.20%
Random Forest Classifier Cross-Validation Accuracy: 98.15%
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 4ms/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 1ms/step
Neural network Classifier Cross-Validation Accuracy: 98.85%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\ASQMM_CUSTOM-------------------- 




92/92 [==============================] - 7s 71ms/step
resnet50 extracted feature space size : (2916, 2048)
TSNE reduced resnet50 feature space shape : (2916, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
KMeans Adjusted Rand Index (ARI -1-1): 0.87
KMeans Normalized Mutual Information (NMI 0-1): 0.75
KMeans Fowlkes-Mallows Score (0-1): 0.96
KMeans Vmeasure (0-1): 0.75
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.90
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.77
HDBSCAN Fowlkes-Mallows Score (0-1): 0.96
HDBSCAN Vmeasure (0-1): 0.77
No description has been provided for this image
# of outliers : 25.000
Silhouette score : 0.402
Davies Bouldin Index *: 1.114
Calinski Harabasz Index: 1594.104
S_Dbw *: 0.893
SVM Classifier Cross-Validation Accuracy: 99.42%
k-NN Classifier Cross-Validation Accuracy: 97.02%
Random Forest Classifier Cross-Validation Accuracy: 98.56%
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 3ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
Neural network Classifier Cross-Validation Accuracy: 98.83%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\PEG-------------------- 




32/32 [==============================] - 3s 74ms/step
resnet50 extracted feature space size : (1000, 2048)
TSNE reduced resnet50 feature space shape : (1000, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=4.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.03
KMeans Normalized Mutual Information (NMI 0-1): 0.02
KMeans Fowlkes-Mallows Score (0-1): 0.53
KMeans Vmeasure (0-1): 0.02
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.16
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.16
HDBSCAN Fowlkes-Mallows Score (0-1): 0.57
HDBSCAN Vmeasure (0-1): 0.16
No description has been provided for this image
# of outliers : 0.000
Silhouette score : 0.053
Davies Bouldin Index *: 4.216
Calinski Harabasz Index: 53.845
S_Dbw *: 0.974
SVM Classifier Cross-Validation Accuracy: 97.40%
k-NN Classifier Cross-Validation Accuracy: 92.60%
Random Forest Classifier Cross-Validation Accuracy: 95.30%
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 92.80%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\Mattendichtung-------------------- 




175/175 [==============================] - 13s 68ms/step
resnet50 extracted feature space size : (5600, 2048)
TSNE reduced resnet50 feature space shape : (5600, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
KMeans Adjusted Rand Index (ARI -1-1): 0.98
KMeans Normalized Mutual Information (NMI 0-1): 0.97
KMeans Fowlkes-Mallows Score (0-1): 0.99
KMeans Vmeasure (0-1): 0.97
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.94
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.92
HDBSCAN Fowlkes-Mallows Score (0-1): 0.96
HDBSCAN Vmeasure (0-1): 0.92
No description has been provided for this image
# of outliers : 43.000
Silhouette score : 0.317
Davies Bouldin Index *: 1.402
Calinski Harabasz Index: 1910.041
S_Dbw *: 0.700
SVM Classifier Cross-Validation Accuracy: 99.46%
k-NN Classifier Cross-Validation Accuracy: 99.43%
Random Forest Classifier Cross-Validation Accuracy: 99.61%
num claasees : 4
18/18 [==============================] - 0s 916us/step
num claasees : 4
18/18 [==============================] - 0s 3ms/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 1ms/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 1ms/step
num claasees : 4
18/18 [==============================] - 0s 924us/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 896us/step
num claasees : 4
18/18 [==============================] - 0s 3ms/step
Neural network Classifier Cross-Validation Accuracy: 99.50%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\metal_nutold-------------------- 




12/12 [==============================] - 2s 97ms/step
resnet50 extracted feature space size : (358, 2048)
TSNE reduced resnet50 feature space shape : (358, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.32
KMeans Normalized Mutual Information (NMI 0-1): 0.33
KMeans Fowlkes-Mallows Score (0-1): 0.77
KMeans Vmeasure (0-1): 0.33
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.32
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.33
HDBSCAN Fowlkes-Mallows Score (0-1): 0.77
HDBSCAN Vmeasure (0-1): 0.33
No description has been provided for this image
# of outliers : 0.000
Silhouette score : 0.191
Davies Bouldin Index *: 2.351
Calinski Harabasz Index: 61.988
S_Dbw *: 1.059
SVM Classifier Cross-Validation Accuracy: 93.03%
k-NN Classifier Cross-Validation Accuracy: 80.75%
Random Forest Classifier Cross-Validation Accuracy: 88.01%
num claasees : 2
2/2 [==============================] - 0s 3ms/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 16ms/step
num claasees : 2
2/2 [==============================] - 0s 2ms/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 87.71%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\screw-------------------- 




15/15 [==============================] - 2s 88ms/step
resnet50 extracted feature space size : (480, 2048)
TSNE reduced resnet50 feature space shape : (480, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): -0.01
KMeans Normalized Mutual Information (NMI 0-1): 0.02
KMeans Fowlkes-Mallows Score (0-1): 0.33
KMeans Vmeasure (0-1): 0.02
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.13
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.07
HDBSCAN Fowlkes-Mallows Score (0-1): 0.58
HDBSCAN Vmeasure (0-1): 0.07
No description has been provided for this image
# of outliers : 0.000
Silhouette score : -0.043
Davies Bouldin Index *: 7.523
Calinski Harabasz Index: 1.741
S_Dbw *: 0.993
SVM Classifier Cross-Validation Accuracy: 81.67%
k-NN Classifier Cross-Validation Accuracy: 75.21%
Random Forest Classifier Cross-Validation Accuracy: 76.04%
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 3ms/step
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 77.92%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\bottle-------------------- 




10/10 [==============================] - 2s 99ms/step
resnet50 extracted feature space size : (292, 2048)
TSNE reduced resnet50 feature space shape : (292, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.46
KMeans Normalized Mutual Information (NMI 0-1): 0.43
KMeans Fowlkes-Mallows Score (0-1): 0.76
KMeans Vmeasure (0-1): 0.43
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.00
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.00
HDBSCAN Fowlkes-Mallows Score (0-1): 0.79
HDBSCAN Vmeasure (0-1): 0.00
No description has been provided for this image
# of outliers : 16.000
Silhouette score : 0.193
Davies Bouldin Index *: 2.347
Calinski Harabasz Index: 29.315
S_Dbw *: 1.170
SVM Classifier Cross-Validation Accuracy: 93.14%
k-NN Classifier Cross-Validation Accuracy: 87.66%
Random Forest Classifier Cross-Validation Accuracy: 92.80%
num claasees : 4
1/1 [==============================] - 0s 62ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 40ms/step
num claasees : 4
1/1 [==============================] - 0s 58ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 45ms/step
num claasees : 4
1/1 [==============================] - 0s 53ms/step
num claasees : 4
1/1 [==============================] - 0s 51ms/step
Neural network Classifier Cross-Validation Accuracy: 92.11%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\capsule-------------------- 




11/11 [==============================] - 2s 109ms/step
resnet50 extracted feature space size : (351, 2048)
TSNE reduced resnet50 feature space shape : (351, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.07
KMeans Normalized Mutual Information (NMI 0-1): 0.11
KMeans Fowlkes-Mallows Score (0-1): 0.37
KMeans Vmeasure (0-1): 0.11
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.09
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.12
HDBSCAN Fowlkes-Mallows Score (0-1): 0.52
HDBSCAN Vmeasure (0-1): 0.12
No description has been provided for this image
# of outliers : 19.000
Silhouette score : -0.023
Davies Bouldin Index *: 4.687
Calinski Harabasz Index: 5.242
S_Dbw *: 1.080
SVM Classifier Cross-Validation Accuracy: 81.77%
k-NN Classifier Cross-Validation Accuracy: 70.37%
Random Forest Classifier Cross-Validation Accuracy: 74.37%
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 71.80%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\cable-------------------- 




12/12 [==============================] - 2s 98ms/step
resnet50 extracted feature space size : (374, 2048)
TSNE reduced resnet50 feature space shape : (374, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.05
KMeans Normalized Mutual Information (NMI 0-1): 0.19
KMeans Fowlkes-Mallows Score (0-1): 0.33
KMeans Vmeasure (0-1): 0.19
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): -0.18
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.06
HDBSCAN Fowlkes-Mallows Score (0-1): 0.56
HDBSCAN Vmeasure (0-1): 0.06
No description has been provided for this image
# of outliers : 4.000
Silhouette score : -0.022
Davies Bouldin Index *: 3.799
Calinski Harabasz Index: 5.990
S_Dbw *: 0.988
SVM Classifier Cross-Validation Accuracy: 87.40%
k-NN Classifier Cross-Validation Accuracy: 77.80%
Random Forest Classifier Cross-Validation Accuracy: 80.47%
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 82.86%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\organic\mnist-------------------- 




19/19 [==============================] - 2s 78ms/step
resnet50 extracted feature space size : (600, 2048)
TSNE reduced resnet50 feature space shape : (600, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=3.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.32
KMeans Normalized Mutual Information (NMI 0-1): 0.48
KMeans Fowlkes-Mallows Score (0-1): 0.39
KMeans Vmeasure (0-1): 0.48
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.06
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.17
HDBSCAN Fowlkes-Mallows Score (0-1): 0.27
HDBSCAN Vmeasure (0-1): 0.17
No description has been provided for this image
# of outliers : 0.000
Silhouette score : 0.065
Davies Bouldin Index *: 3.213
Calinski Harabasz Index: 30.452
S_Dbw *: 0.826
SVM Classifier Cross-Validation Accuracy: 92.17%
k-NN Classifier Cross-Validation Accuracy: 83.33%
Random Forest Classifier Cross-Validation Accuracy: 86.83%
num claasees : 10
2/2 [==============================] - 0s 6ms/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 16ms/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 16ms/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 87.33%
CPU times: total: 32min 47s
Wall time: 17min 48s
In [159]:
%%time
run = 12

# For each dataset
for dataset in datasets:
    print(f'\n\n -------------------For dataset{dataset.dataset_path}-------------------- \n\n\n\n') 
    efficientnet = CustomModel(model_name='efficientnet', data_loader = dataset)
    efficientnet.extract_features()
    efficientnet_tsne = FeatureSpaceVisualizer(efficientnet,reduction_method='tsne' )
    efficientnet_tsne.visualize()
    kmeans_metrics = efficientnet_tsne.kmeans_clustering()
    hdb_metrics = efficientnet_tsne.hdbscan_clustering()
    efficientnet_metrics = Metrics(feature_space = efficientnet.features, data_loader = dataset)

    #Evaluate the dataset
    efficientnet_evaluator = ModelEvaluator(efficientnet, dataset, efficientnet_tsne, test_size = 0.3)
    ml_scores = efficientnet_evaluator.evaluate_with_cross_validation()
    
    # Populating results fro this dataset
    efficientnet_datasets[dataset].append(efficientnet_metrics.metrics)
    efficientnet_datasets[dataset]. append(kmeans_metrics)
    efficientnet_datasets[dataset]. append(ml_scores)
    efficientnet_datasets[dataset]. append(hdb_metrics)
    

 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\organic\catdog-------------------- 




63/63 [==============================] - 4s 56ms/step
efficientnet extracted feature space size : (2000, 1280)
TSNE reduced efficientnet feature space shape : (2000, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
KMeans Adjusted Rand Index (ARI -1-1): 0.97
KMeans Normalized Mutual Information (NMI 0-1): 0.94
KMeans Fowlkes-Mallows Score (0-1): 0.99
KMeans Vmeasure (0-1): 0.94
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.32
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.44
HDBSCAN Fowlkes-Mallows Score (0-1): 0.61
HDBSCAN Vmeasure (0-1): 0.44
No description has been provided for this image
# of outliers : 2.000
Silhouette score : 0.108
Davies Bouldin Index *: 2.829
Calinski Harabasz Index: 244.216
S_Dbw *: 0.943
SVM Classifier Cross-Validation Accuracy: 99.05%
k-NN Classifier Cross-Validation Accuracy: 99.30%
Random Forest Classifier Cross-Validation Accuracy: 99.20%
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 0s/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
num claasees : 2
7/7 [==============================] - 0s 3ms/step
Neural network Classifier Cross-Validation Accuracy: 99.05%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\ASQMM_CUSTOM-------------------- 




92/92 [==============================] - 6s 54ms/step
efficientnet extracted feature space size : (2916, 1280)
TSNE reduced efficientnet feature space shape : (2916, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
KMeans Adjusted Rand Index (ARI -1-1): 0.86
KMeans Normalized Mutual Information (NMI 0-1): 0.75
KMeans Fowlkes-Mallows Score (0-1): 0.96
KMeans Vmeasure (0-1): 0.75
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.88
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.75
HDBSCAN Fowlkes-Mallows Score (0-1): 0.96
HDBSCAN Vmeasure (0-1): 0.75
No description has been provided for this image
# of outliers : 23.000
Silhouette score : 0.416
Davies Bouldin Index *: 1.049
Calinski Harabasz Index: 1756.474
S_Dbw *: 0.857
SVM Classifier Cross-Validation Accuracy: 99.28%
k-NN Classifier Cross-Validation Accuracy: 96.95%
Random Forest Classifier Cross-Validation Accuracy: 97.77%
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 4ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 2ms/step
num claasees : 2
10/10 [==============================] - 0s 3ms/step
Neural network Classifier Cross-Validation Accuracy: 99.11%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\PEG-------------------- 




32/32 [==============================] - 3s 61ms/step
efficientnet extracted feature space size : (1000, 1280)
TSNE reduced efficientnet feature space shape : (1000, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=4.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.04
KMeans Normalized Mutual Information (NMI 0-1): 0.03
KMeans Fowlkes-Mallows Score (0-1): 0.52
KMeans Vmeasure (0-1): 0.03
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.14
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.11
HDBSCAN Fowlkes-Mallows Score (0-1): 0.57
HDBSCAN Vmeasure (0-1): 0.11
No description has been provided for this image
# of outliers : 1.000
Silhouette score : 0.087
Davies Bouldin Index *: 3.253
Calinski Harabasz Index: 88.029
S_Dbw *: 0.959
SVM Classifier Cross-Validation Accuracy: 97.90%
k-NN Classifier Cross-Validation Accuracy: 94.10%
Random Forest Classifier Cross-Validation Accuracy: 95.60%
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 5ms/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 5ms/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 5ms/step
num claasees : 2
4/4 [==============================] - 0s 5ms/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
num claasees : 2
4/4 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 95.50%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\Mattendichtung-------------------- 




175/175 [==============================] - 10s 53ms/step
efficientnet extracted feature space size : (5600, 1280)
TSNE reduced efficientnet feature space shape : (5600, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
KMeans Adjusted Rand Index (ARI -1-1): 0.98
KMeans Normalized Mutual Information (NMI 0-1): 0.96
KMeans Fowlkes-Mallows Score (0-1): 0.98
KMeans Vmeasure (0-1): 0.96
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.95
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.93
HDBSCAN Fowlkes-Mallows Score (0-1): 0.96
HDBSCAN Vmeasure (0-1): 0.93
No description has been provided for this image
# of outliers : 117.000
Silhouette score : 0.370
Davies Bouldin Index *: 1.157
Calinski Harabasz Index: 3292.280
S_Dbw *: 0.600
SVM Classifier Cross-Validation Accuracy: 99.54%
k-NN Classifier Cross-Validation Accuracy: 99.43%
Random Forest Classifier Cross-Validation Accuracy: 99.59%
num claasees : 4
18/18 [==============================] - 0s 1ms/step
num claasees : 4
18/18 [==============================] - 0s 916us/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 1ms/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
num claasees : 4
18/18 [==============================] - 0s 916us/step
num claasees : 4
18/18 [==============================] - 0s 2ms/step
Neural network Classifier Cross-Validation Accuracy: 99.57%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\metal_nutold-------------------- 




12/12 [==============================] - 2s 73ms/step
efficientnet extracted feature space size : (358, 1280)
TSNE reduced efficientnet feature space shape : (358, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.32
KMeans Normalized Mutual Information (NMI 0-1): 0.33
KMeans Fowlkes-Mallows Score (0-1): 0.77
KMeans Vmeasure (0-1): 0.33
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.32
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.33
HDBSCAN Fowlkes-Mallows Score (0-1): 0.77
HDBSCAN Vmeasure (0-1): 0.33
No description has been provided for this image
# of outliers : 1.000
Silhouette score : 0.157
Davies Bouldin Index *: 2.555
Calinski Harabasz Index: 50.548
S_Dbw *: 1.021
SVM Classifier Cross-Validation Accuracy: 93.31%
k-NN Classifier Cross-Validation Accuracy: 82.71%
Random Forest Classifier Cross-Validation Accuracy: 90.25%
num claasees : 2
2/2 [==============================] - 0s 16ms/step
num claasees : 2
2/2 [==============================] - 0s 16ms/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 16ms/step
num claasees : 2
2/2 [==============================] - 0s 16ms/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
num claasees : 2
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 91.09%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\screw-------------------- 




15/15 [==============================] - 2s 67ms/step
efficientnet extracted feature space size : (480, 1280)
TSNE reduced efficientnet feature space shape : (480, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): -0.01
KMeans Normalized Mutual Information (NMI 0-1): 0.02
KMeans Fowlkes-Mallows Score (0-1): 0.34
KMeans Vmeasure (0-1): 0.02
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.12
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.07
HDBSCAN Fowlkes-Mallows Score (0-1): 0.62
HDBSCAN Vmeasure (0-1): 0.07
No description has been provided for this image
# of outliers : 0.000
Silhouette score : -0.027
Davies Bouldin Index *: 6.509
Calinski Harabasz Index: 2.289
S_Dbw *: 0.962
SVM Classifier Cross-Validation Accuracy: 86.46%
k-NN Classifier Cross-Validation Accuracy: 75.42%
Random Forest Classifier Cross-Validation Accuracy: 77.92%
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 80.62%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\bottle-------------------- 




10/10 [==============================] - 2s 74ms/step
efficientnet extracted feature space size : (292, 1280)
TSNE reduced efficientnet feature space shape : (292, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.26
KMeans Normalized Mutual Information (NMI 0-1): 0.38
KMeans Fowlkes-Mallows Score (0-1): 0.61
KMeans Vmeasure (0-1): 0.38
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): -0.04
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.18
HDBSCAN Fowlkes-Mallows Score (0-1): 0.54
HDBSCAN Vmeasure (0-1): 0.18
No description has been provided for this image
# of outliers : 22.000
Silhouette score : 0.240
Davies Bouldin Index *: 2.314
Calinski Harabasz Index: 32.687
S_Dbw *: 1.210
SVM Classifier Cross-Validation Accuracy: 94.52%
k-NN Classifier Cross-Validation Accuracy: 87.99%
Random Forest Classifier Cross-Validation Accuracy: 94.51%
num claasees : 4
1/1 [==============================] - 0s 52ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 63ms/step
num claasees : 4
1/1 [==============================] - 0s 65ms/step
num claasees : 4
1/1 [==============================] - 0s 63ms/step
num claasees : 4
1/1 [==============================] - 0s 47ms/step
num claasees : 4
1/1 [==============================] - 0s 53ms/step
num claasees : 4
1/1 [==============================] - 0s 63ms/step
Neural network Classifier Cross-Validation Accuracy: 93.85%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\capsule-------------------- 




11/11 [==============================] - 2s 75ms/step
efficientnet extracted feature space size : (351, 1280)
TSNE reduced efficientnet feature space shape : (351, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.01
KMeans Normalized Mutual Information (NMI 0-1): 0.08
KMeans Fowlkes-Mallows Score (0-1): 0.33
KMeans Vmeasure (0-1): 0.08
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): -0.10
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.12
HDBSCAN Fowlkes-Mallows Score (0-1): 0.40
HDBSCAN Vmeasure (0-1): 0.12
No description has been provided for this image
# of outliers : 21.000
Silhouette score : 0.013
Davies Bouldin Index *: 4.394
Calinski Harabasz Index: 6.650
S_Dbw *: 1.093
SVM Classifier Cross-Validation Accuracy: 84.36%
k-NN Classifier Cross-Validation Accuracy: 69.52%
Random Forest Classifier Cross-Validation Accuracy: 75.23%
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 0s/step
num claasees : 6
2/2 [==============================] - 0s 16ms/step
num claasees : 6
2/2 [==============================] - 0s 18ms/step
Neural network Classifier Cross-Validation Accuracy: 76.06%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\cable-------------------- 




12/12 [==============================] - 2s 93ms/step
efficientnet extracted feature space size : (374, 1280)
TSNE reduced efficientnet feature space shape : (374, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=2.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.05
KMeans Normalized Mutual Information (NMI 0-1): 0.23
KMeans Fowlkes-Mallows Score (0-1): 0.32
KMeans Vmeasure (0-1): 0.23
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): -0.12
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.04
HDBSCAN Fowlkes-Mallows Score (0-1): 0.65
HDBSCAN Vmeasure (0-1): 0.04
No description has been provided for this image
# of outliers : 2.000
Silhouette score : 0.009
Davies Bouldin Index *: 3.678
Calinski Harabasz Index: 6.614
S_Dbw *: 0.998
SVM Classifier Cross-Validation Accuracy: 87.70%
k-NN Classifier Cross-Validation Accuracy: 78.88%
Random Forest Classifier Cross-Validation Accuracy: 81.80%
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 16ms/step
num claasees : 9
2/2 [==============================] - 0s 6ms/step
num claasees : 9
2/2 [==============================] - 0s 16ms/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 16ms/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
num claasees : 9
2/2 [==============================] - 0s 0s/step
Neural network Classifier Cross-Validation Accuracy: 85.28%


 -------------------For datasetC:\Users\hdn7rng\Desktop\Experiments\datasets\organic\mnist-------------------- 




19/19 [==============================] - 2s 70ms/step
efficientnet extracted feature space size : (600, 1280)
TSNE reduced efficientnet feature space shape : (600, 2)
No description has been provided for this image
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1416: FutureWarning: The default value of `n_init` will change from 10 to 'auto' in 1.4. Set the value of `n_init` explicitly to suppress the warning
  super()._check_params_vs_input(X, default_n_init=10)
C:\Users\hdn7rng\.conda\envs\tf\lib\site-packages\sklearn\cluster\_kmeans.py:1440: UserWarning: KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=3.
  warnings.warn(
KMeans Adjusted Rand Index (ARI -1-1): 0.30
KMeans Normalized Mutual Information (NMI 0-1): 0.44
KMeans Fowlkes-Mallows Score (0-1): 0.37
KMeans Vmeasure (0-1): 0.44
No description has been provided for this image
HDBSCAN Adjusted Rand Index (ARI -1-1): 0.02
HDBSCAN Normalized Mutual Information (NMI 0-1): 0.14
HDBSCAN Fowlkes-Mallows Score (0-1): 0.26
HDBSCAN Vmeasure (0-1): 0.14
No description has been provided for this image
# of outliers : 0.000
Silhouette score : 0.051
Davies Bouldin Index *: 3.541
Calinski Harabasz Index: 24.931
S_Dbw *: 0.850
SVM Classifier Cross-Validation Accuracy: 91.00%
k-NN Classifier Cross-Validation Accuracy: 78.00%
Random Forest Classifier Cross-Validation Accuracy: 84.00%
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 16ms/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 0s/step
num claasees : 10
2/2 [==============================] - 0s 16ms/step
Neural network Classifier Cross-Validation Accuracy: 84.83%
CPU times: total: 28min 39s
Wall time: 16min 1s
In [132]:
len(datasets_resnet)
Out[132]:
10
In [160]:
len(efficientnet_datasets)
Out[160]:
10

Storing results¶

In [161]:
import matplotlib.pyplot as plt
import numpy as np

# Extract DBI, Silhouette, CH, SDBW,  values
dbi_values = []
silhouette_values = []
ch_values = []
sdbw_values = []
# Extract ML scores
svm_values = []
nn_values = []
knn_values = []
rf_values = []
# HDBSCANExtrinsic metrics
HDBSCANari_values = []
HDBSCANnmi_values = []
HDBSCANfm_values = []
HDBSCANvm_values = []
# KMeansExtrinsic metrics
KMeansari_values = []
KMeansnmi_values = []
KMeansfm_values = []
KMeansvm_values = []


for key, value in efficientnet_datasets.items():
    dbi = value[0]['DBI']
    silhouette = value[0]['silhouette']
    ch = value[0]['CH']
    sdbw = value[0]['sdbw']
    
    svm = value[2]['svm']
    nn = value[2]['nn']
    rf = value[2]['rf']
    knn = value[2]['knn']

    # for HDBSCAN
    HDBSCANari = value[3]['HDBSCANari']
    HDBSCANnmi = value[3]['HDBSCANnmi']
    HDBSCANfm = value[3]['HDBSCANfm']
    HDBSCANvm = value[3]['HDBSCANvm']
    # for kmeans
    KMeansari = value[1]['KMeansari']
    KMeansnmi = value[1]['KMeansnmi']
    KMeansfm = value[1]['KMeansfm']
    KMeansvm = value[1]['KMeansvm']
    
    dbi_values.append(dbi)
    silhouette_values.append(silhouette)
    ch_values.append(ch)
    sdbw_values.append(sdbw)
    
    svm_values.append(svm)
    nn_values.append(nn)
    rf_values.append(rf)
    knn_values.append(knn)

    # For HDBSCAN
    HDBSCANvm_values.append(HDBSCANvm)
    HDBSCANfm_values.append(HDBSCANfm)
    HDBSCANnmi_values.append(HDBSCANnmi)
    HDBSCANari_values.append(HDBSCANari)

    # For HDBSCAN
    KMeansvm_values.append(KMeansvm)
    KMeansfm_values.append(KMeansfm)
    KMeansnmi_values.append(KMeansnmi)
    KMeansari_values.append(KMeansari)
    

resnet values¶

In [ ]:
 
In [202]:
import matplotlib.pyplot as plt
import numpy as np

# Extract DBI, Silhouette, CH, SDBW,  values
resnet_dbi_values = []
resnet_silhouette_values = []
resnet_ch_values = []
resnet_sdbw_values = []
# Extract ML scores
resnet_svm_values = []
resnet_nn_values = []
resnet_knn_values = []
resnet_rf_values = []
# HDBSCANExtrinsic metrics
resnet_HDBSCANari_values = []
resnet_HDBSCANnmi_values = []
resnet_HDBSCANfm_values = []
resnet_HDBSCANvm_values = []
# KMeansExtrinsic metrics
resnet_KMeansari_values = []
resnet_KMeansnmi_values = []
resnet_KMeansfm_values = []
resnet_KMeansvm_values = []


for key, value in datasets_resnet.items():
    dbi = value[0]['DBI']
    silhouette = value[0]['silhouette']
    ch = value[0]['CH']
    sdbw = value[0]['sdbw']
    
    svm = value[2]['svm']
    nn = value[2]['nn']
    rf = value[2]['rf']
    knn = value[2]['knn']

    # for HDBSCAN
    HDBSCANari = value[3]['HDBSCANari']
    HDBSCANnmi = value[3]['HDBSCANnmi']
    HDBSCANfm = value[3]['HDBSCANfm']
    HDBSCANvm = value[3]['HDBSCANvm']
    # for kmeans
    KMeansari = value[1]['KMeansari']
    KMeansnmi = value[1]['KMeansnmi']
    KMeansfm = value[1]['KMeansfm']
    KMeansvm = value[1]['KMeansvm']
    
    resnet_dbi_values.append(dbi)
    resnet_silhouette_values.append(silhouette)
    resnet_ch_values.append(ch)
    resnet_sdbw_values.append(sdbw)
    
    resnet_svm_values.append(svm)
    resnet_nn_values.append(nn)
    resnet_rf_values.append(rf)
    resnet_knn_values.append(knn)

    # For HDBSCAN
    resnet_HDBSCANvm_values.append(HDBSCANvm)
    resnet_HDBSCANfm_values.append(HDBSCANfm)
    resnet_HDBSCANnmi_values.append(HDBSCANnmi)
    resnet_HDBSCANari_values.append(HDBSCANari)

    # For HDBSCAN
    resnet_KMeansvm_values.append(KMeansvm)
    resnet_KMeansfm_values.append(KMeansfm)
    resnet_KMeansnmi_values.append(KMeansnmi)
    resnet_KMeansari_values.append(KMeansari)
    

Plotting ML accuracy results¶

In [163]:
# Create scatter plots with trend lines
plt.figure(figsize=(24, 6))

# DBI vs SVM
plt.subplot(141)
plt.scatter(dbi_values, svm_values, c='b', marker='o')
plt.xlabel('DBI')
plt.ylabel('SVM accuracy (%)')
plt.title('DBI vs SVM')

dbi_fit = np.polyfit(dbi_values, svm_values, 1)
plt.plot(dbi_values, np.polyval(dbi_fit, dbi_values), 'b--', label='Trend Line')
plt.legend()

# Silhouette vs SVM
plt.subplot(142)
plt.scatter(silhouette_values, svm_values, c='g', marker='o')
plt.xlabel('Silhouette')
plt.ylabel('SVM accuracy (%)')
plt.title('Silhouette vs SVM')

silhouette_fit = np.polyfit(silhouette_values, svm_values, 1)
plt.plot(silhouette_values, np.polyval(silhouette_fit, silhouette_values), 'g--', label='Trend Line')
plt.legend()

# CH vs SVM
plt.subplot(143)
plt.scatter(ch_values, svm_values, c='r', marker='o')
plt.xlabel('CH')
plt.ylabel('SVM accuracy (%)')
plt.title('CH vs SVM')

ch_fit = np.polyfit(ch_values, svm_values, 1)
plt.plot(ch_values, np.polyval(ch_fit, ch_values), 'r--', label='Trend Line')
plt.legend()

# SDBW vs SVM
plt.subplot(144)
plt.scatter(sdbw_values, svm_values, c='y', marker='o')
plt.xlabel('SDBW')
plt.ylabel('SVM accuracy (%)')
plt.title('SDBW vs SVM')

sdbw_fit = np.polyfit(sdbw_values, svm_values, 1)
plt.plot(sdbw_values, np.polyval(sdbw_fit, sdbw_values), 'y--', label='Trend Line')
plt.legend()

plt.tight_layout()
plt.savefig('svm_vs_metrics', dpi=300)

plt.show()
No description has been provided for this image
In [164]:
# Create scatter plots with trend lines for NN
plt.figure(figsize=(24, 6))

# DBI vs NN
plt.subplot(141)
plt.scatter(dbi_values, nn_values, c='b', marker='o')
plt.xlabel('DBI')
plt.ylabel('NN accuracy (%)')
plt.title('DBI vs NN')

dbi_fit = np.polyfit(dbi_values, nn_values, 1)
plt.plot(dbi_values, np.polyval(dbi_fit, dbi_values), 'b--', label='Trend Line')
plt.legend()

# Silhouette vs NN
plt.subplot(142)
plt.scatter(silhouette_values, nn_values, c='g', marker='o')
plt.xlabel('Silhouette')
plt.ylabel('NN accuracy (%)')
plt.title('Silhouette vs NN')

silhouette_fit = np.polyfit(silhouette_values, nn_values, 1)
plt.plot(silhouette_values, np.polyval(silhouette_fit, silhouette_values), 'g--', label='Trend Line')
plt.legend()

# CH vs NN
plt.subplot(143)
plt.scatter(ch_values, nn_values, c='r', marker='o')
plt.xlabel('CH')
plt.ylabel('NN accuracy (%)')
plt.title('CH vs NN')

ch_fit = np.polyfit(ch_values, nn_values, 1)
plt.plot(ch_values, np.polyval(ch_fit, ch_values), 'r--', label='Trend Line')
plt.legend()

# SDBW vs NN
plt.subplot(144)
plt.scatter(sdbw_values, nn_values, c='y', marker='o')
plt.xlabel('SDBW')
plt.ylabel('NN accuracy (%)')
plt.title('SDBW vs NN')

sdbw_fit = np.polyfit(sdbw_values, nn_values, 1)
plt.plot(sdbw_values, np.polyval(sdbw_fit, sdbw_values), 'y--', label='Trend Line')
plt.legend()

plt.tight_layout()
plt.savefig('nn_vs_metrics', dpi=300)

plt.show()
No description has been provided for this image
In [165]:
# Create scatter plots with trend lines for RF
plt.figure(figsize=(24, 6))

# DBI vs RF
plt.subplot(141)
plt.scatter(dbi_values, rf_values, c='b', marker='o')
plt.xlabel('DBI')
plt.ylabel('RF accuracy (%)')
plt.title('DBI vs RF')

dbi_fit = np.polyfit(dbi_values, rf_values, 1)
plt.plot(dbi_values, np.polyval(dbi_fit, dbi_values), 'b--', label='Trend Line')
plt.legend()

# Silhouette vs RF
plt.subplot(142)
plt.scatter(silhouette_values, rf_values, c='g', marker='o')
plt.xlabel('Silhouette')
plt.ylabel('RF accuracy (%)')
plt.title('Silhouette vs RF')

silhouette_fit = np.polyfit(silhouette_values, rf_values, 1)
plt.plot(silhouette_values, np.polyval(silhouette_fit, silhouette_values), 'g--', label='Trend Line')
plt.legend()

# CH vs RF
plt.subplot(143)
plt.scatter(ch_values, rf_values, c='r', marker='o')
plt.xlabel('CH')
plt.ylabel('RF accuracy (%)')
plt.title('CH vs RF')

ch_fit = np.polyfit(ch_values, rf_values, 1)
plt.plot(ch_values, np.polyval(ch_fit, ch_values), 'r--', label='Trend Line')
plt.legend()

# SDBW vs RF
plt.subplot(144)
plt.scatter(sdbw_values, rf_values, c='y', marker='o')
plt.xlabel('SDBW')
plt.ylabel('RF accuracy (%)')
plt.title('SDBW vs RF')

sdbw_fit = np.polyfit(sdbw_values, rf_values, 1)
plt.plot(sdbw_values, np.polyval(sdbw_fit, sdbw_values), 'y--', label='Trend Line')
plt.legend()

plt.tight_layout()
plt.savefig('dbi_vs_metrics', dpi=300)

plt.show()
No description has been provided for this image
In [166]:
# Create scatter plots with trend lines for KNN
plt.figure(figsize=(24, 6))

# DBI vs KNN
plt.subplot(141)
plt.scatter(dbi_values, knn_values, c='b', marker='o')
plt.xlabel('DBI')
plt.ylabel('KNN accuracy (%)')
plt.title('DBI vs KNN')

dbi_fit = np.polyfit(dbi_values, knn_values, 1)
plt.plot(dbi_values, np.polyval(dbi_fit, dbi_values), 'b--', label='Trend Line')
plt.legend()

# Silhouette vs KNN
plt.subplot(142)
plt.scatter(silhouette_values, knn_values, c='g', marker='o')
plt.xlabel('Silhouette')
plt.ylabel('KNN accuracy (%)')
plt.title('Silhouette vs KNN')

silhouette_fit = np.polyfit(silhouette_values, knn_values, 1)
plt.plot(silhouette_values, np.polyval(silhouette_fit, silhouette_values), 'g--', label='Trend Line')
plt.legend()

# CH vs KNN
plt.subplot(143)
plt.scatter(ch_values, knn_values, c='r', marker='o')
plt.xlabel('CH')
plt.ylabel('KNN accuracy (%)')
plt.title('CH vs KNN')

ch_fit = np.polyfit(ch_values, knn_values, 1)
plt.plot(ch_values, np.polyval(ch_fit, ch_values), 'r--', label='Trend Line')
plt.legend()

# SDBW vs KNN
plt.subplot(144)
plt.scatter(sdbw_values, knn_values, c='y', marker='o')
plt.xlabel('SDBW')
plt.ylabel('KNN accuracy (%)')
plt.title('SDBW vs KNN')

sdbw_fit = np.polyfit(sdbw_values, knn_values, 1)
plt.plot(sdbw_values, np.polyval(sdbw_fit, sdbw_values), 'y--', label='Trend Line')
plt.legend()

plt.tight_layout()
plt.savefig('knn_vs_metrics', dpi=300)
plt.show()
No description has been provided for this image

Plotting correlation coefficients¶

In [167]:
# Calculate SVM correlation coefficients
correlation_dbi_svm = np.corrcoef(dbi_values, svm_values)[0, 1]
correlation_silhouette_svm = np.corrcoef(silhouette_values, svm_values)[0, 1]
correlation_ch_svm = np.corrcoef(ch_values, svm_values)[0, 1]
correlation_sdbw_svm = np.corrcoef(sdbw_values, svm_values)[0, 1]

print(f"Correlation DBI vs SVM: {correlation_dbi_svm}")
print(f"Correlation Silhouette vs SVM: {correlation_silhouette_svm}")
print(f"Correlation CH vs SVM: {correlation_ch_svm}")
print(f"Correlation SDBW vs SVM: {correlation_sdbw_svm}")

# Calculate nn correlation coefficients
correlation_dbi_nn = np.corrcoef(dbi_values, nn_values)[0, 1]
correlation_silhouette_nn = np.corrcoef(silhouette_values, nn_values)[0, 1]
correlation_ch_nn = np.corrcoef(ch_values, nn_values)[0, 1]
correlation_sdbw_nn = np.corrcoef(sdbw_values, nn_values)[0, 1]

print(f"Correlation DBI vs NN: {correlation_dbi_nn}")
print(f"Correlation Silhouette vs NN: {correlation_silhouette_nn}")
print(f"Correlation CH vs NN: {correlation_ch_nn}")
print(f"Correlation SDBW vs N:N {correlation_sdbw_nn}")

# Calculate rf correlation coefficients
correlation_dbi_rf = np.corrcoef(dbi_values, rf_values)[0, 1]
correlation_silhouette_rf = np.corrcoef(silhouette_values, rf_values)[0, 1]
correlation_ch_rf = np.corrcoef(ch_values, rf_values)[0, 1]
correlation_sdbw_rf = np.corrcoef(sdbw_values, rf_values)[0, 1]

print(f"Correlation DBI vs rf: {correlation_dbi_rf}")
print(f"Correlation Silhouette vs rf: {correlation_silhouette_rf}")
print(f"Correlation CH vs rf: {correlation_ch_rf}")
print(f"Correlation SDBW vs rf {correlation_sdbw_rf}")

# Calculate knn correlation coefficients
correlation_dbi_knn = np.corrcoef(dbi_values, knn_values)[0, 1]
correlation_silhouette_knn = np.corrcoef(silhouette_values, knn_values)[0, 1]
correlation_ch_knn = np.corrcoef(ch_values, knn_values)[0, 1]
correlation_sdbw_knn = np.corrcoef(sdbw_values, knn_values)[0, 1]

print(f"Correlation DBI vs knn: {correlation_dbi_knn}")
print(f"Correlation Silhouette vs knn: {correlation_silhouette_knn}")
print(f"Correlation CH vs knn: {correlation_ch_knn}")
print(f"Correlation SDBW vs knn {correlation_sdbw_knn}")
Correlation DBI vs SVM: -0.798167740763308
Correlation Silhouette vs SVM: 0.7593030065912305
Correlation CH vs SVM: 0.5753005166994785
Correlation SDBW vs SVM: -0.45841777210158086
Correlation DBI vs NN: -0.8136248441591379
Correlation Silhouette vs NN: 0.7725360057820152
Correlation CH vs NN: 0.5693771051365971
Correlation SDBW vs N:N -0.4048873655068264
Correlation DBI vs rf: -0.8191437551835935
Correlation Silhouette vs rf: 0.7688587524353746
Correlation CH vs rf: 0.5504489804979751
Correlation SDBW vs rf -0.3712995108129758
Correlation DBI vs knn: -0.7462832734835978
Correlation Silhouette vs knn: 0.7281508696141904
Correlation CH vs knn: 0.6103429605699575
Correlation SDBW vs knn -0.4734902235873424

HDBSCAN Extrinsic metrics¶

In [121]:
# HDB metrics
# Create scatter plots with trend lines
plt.figure(figsize=(24, 6))

# ARI vs SVM
plt.subplot(141)
plt.scatter(HDBSCANari_values, svm_values, c='b', marker='o')
plt.xlabel('Adjusted Rand Index')
plt.ylabel('SVM accuracy (%)')
plt.title('HDBSCAN ARI vs SVM')

HDBSCANari_fit = np.polyfit(HDBSCANari_values, svm_values, 1)
plt.plot(HDBSCANari_values, np.polyval(HDBSCANari_fit, HDBSCANari_values), 'b--', label='Trend Line')
plt.legend()

# nmi vs SVM
plt.subplot(142)
plt.scatter(HDBSCANnmi_values, svm_values, c='g', marker='o')
plt.xlabel('Normalized Mutual Index')
plt.ylabel('SVM accuracy (%)')
plt.title('HBSCAN NMI vs SVM')

HDBSCANnmi_fit = np.polyfit(HDBSCANnmi_values, svm_values, 1)
plt.plot(HDBSCANnmi_values, np.polyval(HDBSCANnmi_fit, HDBSCANnmi_values), 'g--', label='Trend Line')
plt.legend()

# HDBSCANfm vs SVM
plt.subplot(143)
plt.scatter(HDBSCANfm_values, svm_values, c='r', marker='o')
plt.xlabel('Fowlkes-Mallows')
plt.ylabel('SVM accuracy (%)')
plt.title('HDBSCAN FM vs SVM')

HDBSCANfm_fit = np.polyfit(HDBSCANfm_values, svm_values, 1)
plt.plot(HDBSCANfm_values, np.polyval(HDBSCANfm_fit, HDBSCANfm_values), 'r--', label='Trend Line')
plt.legend()

# HDBSCANvm vs SVM
plt.subplot(144)
plt.scatter(HDBSCANvm_values, svm_values, c='y', marker='o')
plt.xlabel('V-measure')
plt.ylabel('SVM accuracy (%)')
plt.title('HDBSCAN VM vs SVM')

HDBSCANvm_fit = np.polyfit(HDBSCANvm_values, svm_values, 1)
plt.plot(HDBSCANvm_values, np.polyval(HDBSCANvm_fit, HDBSCANvm_values), 'y--', label='Trend Line')
plt.legend()

plt.tight_layout()
plt.savefig('HDBSCAN_metrics', dpi=300)

plt.show()
No description has been provided for this image

KMEANS metrics¶

In [122]:
# HDB metrics
# Create scatter plots with trend lines
plt.figure(figsize=(24, 6))

# ARI vs SVM
plt.subplot(141)
plt.scatter(KMeansari_values, svm_values, c='b', marker='o')
plt.xlabel('Adjusted Rand Index')
plt.ylabel('SVM accuracy (%)')
plt.title('KMeans ARI vs SVM')

KMeansari_fit = np.polyfit(KMeansari_values, svm_values, 1)
plt.plot(KMeansari_values, np.polyval(KMeansari_fit, KMeansari_values), 'b--', label='Trend Line')
plt.legend()

# nmi vs SVM
plt.subplot(142)
plt.scatter(KMeansnmi_values, svm_values, c='g', marker='o')
plt.xlabel('Normalized Mutual Index')
plt.ylabel('SVM accuracy (%)')
plt.title('KMeans NMI vs SVM')

KMeansnmi_fit = np.polyfit(KMeansnmi_values, svm_values, 1)
plt.plot(KMeansnmi_values, np.polyval(KMeansnmi_fit, KMeansnmi_values), 'g--', label='Trend Line')
plt.legend()

# Kmeansfm vs SVM
plt.subplot(143)
plt.scatter(KMeansfm_values, svm_values, c='r', marker='o')
plt.xlabel('Fowlkes-Mallows')
plt.ylabel('SVM accuracy (%)')
plt.title('Kmeans FM vs SVM')

KMeansfm_fit = np.polyfit(KMeansfm_values, svm_values, 1)
plt.plot(KMeansfm_values, np.polyval(KMeansfm_fit, KMeansfm_values), 'r--', label='Trend Line')
plt.legend()

# Kmeansvm vs SVM
plt.subplot(144)
plt.scatter(KMeansvm_values, svm_values, c='y', marker='o')
plt.xlabel('V-measure')
plt.ylabel('SVM accuracy (%)')
plt.title('KMeans VM vs SVM')

KMeansvm_fit = np.polyfit(KMeansvm_values, svm_values, 1)
plt.plot(KMeansvm_values, np.polyval(KMeansvm_fit, KMeansvm_values), 'y--', label='Trend Line')
plt.legend()

plt.tight_layout()
plt.savefig('KMeans_metrics', dpi=300)

plt.show()
No description has been provided for this image
In [123]:
# Calculate DBSCAN correlation coefficients
correlation_HDBSCANari = np.corrcoef(HDBSCANari_values, svm_values)[0, 1]
correlation_HDBSCANnmi = np.corrcoef(HDBSCANnmi_values, svm_values)[0, 1]
correlation_HDBSCANfm = np.corrcoef(HDBSCANfm_values, svm_values)[0, 1]
correlation_HDBSCANvm = np.corrcoef(HDBSCANvm_values, svm_values)[0, 1]

print(f"Correlation DBSCAN vs ari: {correlation_HDBSCANari}")
print(f"Correlation DBSCAN vs nmi: {correlation_HDBSCANnmi}")
print(f"Correlation DBSCAN vs fm: {correlation_HDBSCANfm}")
print(f"Correlation DBSCAN vs vm: {correlation_HDBSCANvm}")


# Calculate KMeans correlation coefficients
correlation_KMeansari = np.corrcoef(KMeansari_values, svm_values)[0, 1]
correlation_KMeansnmi = np.corrcoef(KMeansnmi_values, svm_values)[0, 1]
correlation_KMeansfm = np.corrcoef(KMeansfm_values, svm_values)[0, 1]
correlation_KMeansvm = np.corrcoef(KMeansvm_values, svm_values)[0, 1]

print(f"CorrelationKMeans vs ari: {correlation_KMeansari}")
print(f"CorrelationKMeans vs nmi: {correlation_KMeansnmi}")
print(f"CorrelationKMeans vs fm: {correlation_KMeansfm}")
print(f"CorrelationKMeans vs vm: {correlation_KMeansvm}")
Correlation DBSCAN vs ari: 0.5695347540556673
Correlation DBSCAN vs nmi: 0.6273229488070051
Correlation DBSCAN vs fm: 0.49300322912035316
Correlation DBSCAN vs vm: 0.6273229488070051
CorrelationKMeans vs ari: 0.7701772796984184
CorrelationKMeans vs nmi: 0.7378874033350974
CorrelationKMeans vs fm: 0.8288608770481086
CorrelationKMeans vs vm: 0.7378874033350972

RESNET VS EFFICIENTNET¶

In [218]:
str(dataset.dataset_path).split('\\')[-1]
Out[218]:
'mnist'
In [ ]:

In [285]:
import matplotlib.pyplot as plt
dataset_names = []
for dataset in datasets_resnet:
    dataset_names.append(str(dataset.dataset_path).split('\\')[-1])
    
    
def compare_nets(efficientnet_values, resnet_values, metric='Metric',save = 'False' ) :   
    datasets = ['CatDog','ASQMM','PEG','Mattendichtung','MetalNut','Screw','Bottle','Capsule','Cable','MNIST']
    # Sample data for ResNet and EfficientNet metrics
    
    
    
    # Create a line chart for accuracy comparison
    plt.figure(figsize=(10, 6))
    plt.plot(datasets,resnet_values  , marker='o', label='ResNet50', linestyle='-', color='blue')
    plt.plot(datasets, efficientnet_values, marker='s', label='EfficientNetB0', linestyle='--', color='green')
    
    # Add labels and a legend
    plt.xlabel('Datasets')
    plt.ylabel(f'{metric}')
    plt.title(f'{metric} comparison between ResNet50 and EfficientNetB0 (lower is better)')
    plt.legend()
    
    # Show the plot
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    if save:
        plt.savefig(f'{metric}_resnet_vs_efficientnet.png', dpi=300)
    plt.show()

metric = 'S_Dbw'
efficientnet_values = sdbw_values
resnet_values = resnet_sdbw_values
compare_nets(efficientnet_values , resnet_values, metric = metric, save =False)
No description has been provided for this image
In [266]:
for key, value in efficientnet_datasets.items():
    print(value[0]['sdbw'])
0.9431370556193436
0.8565630287350994
0.9587223308921301
0.6000322771071182
1.0205056979817397
0.9622661682242984
1.2102381195394367
1.092981590351097
0.9977684455924868
0.8503743728543772
In [256]:
for key, value in datasets_resnet.items():
    print(value[0]['silhouette'])
0.09028799
0.40238285
0.052925892
0.31745523
0.19109415
-0.04304799
0.19306804
-0.022640416
-0.02170907
0.06452677

Complex graph¶

In [417]:
def complex_graph(efficientnet_metric , resnet_metric, efficientnet_performance, resnet_performance, model='ML model', metric = metric, save =False):
    # Sample data for ResNet and EfficientNet metrics
    datasets = ['CatDog','ASQMM','PEG','Mattendichtung','MetalNut','Screw','Bottle','Capsule','Cable','MNIST']
    # Create a figure and a set of axes
    fig, ax1,  = plt.subplots(figsize=(12, 6))
    
    # Create the line chart on the primary y-axis (ax1)
    plt.title(f'{metric} comparison between ResNet50 and EfficientNetB0 (higher is better)')

    ax1.plot(datasets, resnet_metric, marker='o', label='ResNet50', linestyle='-', color='blue')
    ax1.plot(datasets, efficientnet_metric, marker='s', label='EfficientNetB0', linestyle='--', color='green')
    ax1.set_xlabel('Datasets')
    ax1.set_ylabel(f'{metric}', color='black')
    ax1.tick_params(axis='y', labelcolor='black')
    ax1.legend(loc='lower left')
    ax1.grid(True)
    ax1.tick_params(axis='x', rotation=45)
    bar_width = 0.16  # Adjust the bar width
    num_datasets = np.arange(len(datasets))
    
    # Create the bar chart on the secondary y-axis (ax2)
    ax2 = ax1.twinx()  # Create a twin axes sharing the same x-axis
    ax2.bar(num_datasets-bar_width / 2, resnet_performance,width = bar_width, alpha=0.1, label=f'ResNet50', color='blue')
    ax2.bar(num_datasets+bar_width / 2, efficientnet_performance,width = bar_width, alpha=0.1, label=f'EfficientnetB0', color='green')

    ax2.set_ylabel('SVM accuracy', color='black')
    ax2.tick_params(axis='y', labelcolor='black')
    ax2.legend(loc='upper right')

    # Set y-axis limits for accuracy
    ax2.set_ylim([70, 100])
    
    # Add a legend that combines both line and bar legends
    lines, labels = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    # ax1.legend(lines + lines2, labels + labels2, loc='upper left')
    
    # Adjust the spacing between the two y-axes
    # plt.tight_layout()
    if save:
        plt.savefig(f'{metric}_complex_graph.png',bbox_inches='tight', dpi=300)
    # Show the plot
    plt.show()
    

metric = 'Silhouette score'
efficientnet_values = silhouette_values
resnet_values = resnet_silhouette_values
efficientnet_performance = svm_values
resnet_performance = resnet_svm_values

complex_graph(efficientnet_values, resnet_values, efficientnet_performance, resnet_performance, model = 'SVM', save = True)
No description has been provided for this image
In [318]:
efficientnet_performance
Out[318]:
[99.05,
 99.2795273737231,
 97.9,
 99.53571428571429,
 93.30952380952381,
 86.45833333333334,
 94.51724137931035,
 84.35714285714285,
 87.69559032716927,
 90.99999999999999]
In [320]:
resnet_performance
Out[320]:
[98.85,
 99.41674904674483,
 97.39999999999999,
 99.46428571428572,
 93.03174603174604,
 81.66666666666667,
 93.13793103448275,
 81.76984126984127,
 87.40398293029872,
 92.16666666666666]
In [324]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data for ResNet and EfficientNet metrics
datasets = ["Dataset 1", "Dataset 2", "Dataset 3", "Dataset 4", "Dataset 5"]
resnet_accuracy = [0.85, 0.88, 0.92, 0.78, 0.91]
efficientnet_accuracy = [0.87, 0.89, 0.91, 0.75, 0.92]
resnet_speed = [10, 12, 15, 8, 9]  # Sample speed data for the first bar chart
efficientnet_speed = [8, 9, 10, 11, 12]  # Sample speed data for the second bar chart

# Create a figure and a set of axes
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))

# Line chart 1 (top)
ax1.plot(datasets, resnet_accuracy, marker='o', label='ResNet Accuracy', linestyle='-', color='blue')
ax1.plot(datasets, efficientnet_accuracy, marker='s', label='EfficientNet Accuracy', linestyle='--', color='green')
ax1.set_xlabel('Datasets')
ax1.set_ylabel('Accuracy')
ax1.set_title('Accuracy Comparison')
ax1.legend()
ax1.grid(True)
ax1.tick_params(axis='x', rotation=45)

# Bar chart 1 (bottom-left)
ax2.bar(datasets, resnet_speed, alpha=0.5, label='ResNet Speed', color='orange')
ax2.bar(datasets, efficientnet_speed, alpha=0.5, label='EfficientNet Speed', color='red')
ax2.set_xlabel('Datasets')
ax2.set_ylabel('Speed')
ax2.set_title('Speed Comparison')
ax2.legend()
ax2.grid(True)
ax2.tick_params(axis='x', rotation=45)

# Adjust the layout and spacing
plt.tight_layout()

# Show the plot
plt.show()
No description has been provided for this image